import requests

import json

# 从settings.py文件中读取用户自定义的参数

# settings.py一般需要设置的参数包括cookies和question_id

# from settings import BASE_URL, HEADERS, COOKIES, QUESTION_ID

import jsonlines

from http.zhihu import *


class Zhihu(object):
    def __init__(self, offset):
        # offset相当于第几页，比如从第一页开始就设置为0.
        self.offset = offset
    def crawl(self):
        # 采集函数
        url = BASE_URL.format(question_id=QUESTION_ID, offset=self.offset)
        req = requests.get(url, headers=HEADERS, cookies=COOKIES)
        jsondata = req.json()
        if not jsondata['paging']['is_end']:
            self.save(jsondata['data'])
            self.offset += 1
            return self.crawl()
        else:
            print('程序运行终止', )
            return
    def save(self, jsondata):
        # 存储函数,存储到  QUESTION_ID.json 文件当中
        with jsonlines.open('{}.json'.format(QUESTION_ID), 'a') as writer:
            for jsonl in jsondata:
                print(self.offset, jsonl)
                writer.write(jsonl)
# 从第一页开始抓取
Zhihu(offset=0).crawl()